import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import shapiro
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import plot_tree, DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.svm import SVC
df=pd.read_csv(r"D:\Excel project\energy_dataset_.csv")
df
| Type_of_Renewable_Energy | Installed_Capacity_MW | Energy_Production_MWh | Energy_Consumption_MWh | Energy_Storage_Capacity_MWh | Storage_Efficiency_Percentage | Grid_Integration_Level | Initial_Investment_USD | Funding_Sources | Financial_Incentives_USD | GHG_Emission_Reduction_tCO2e | Air_Pollution_Reduction_Index | Jobs_Created | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4 | 93.423205 | 103853.2206 | 248708.4892 | 2953.248771 | 89.887562 | 4 | 4.732248e+08 | 1 | 9.207772e+06 | 6663.816572 | 81.742461 | 1366 |
| 1 | 4 | 590.468942 | 190223.0649 | 166104.1642 | 5305.174042 | 84.403343 | 4 | 1.670697e+08 | 2 | 1.685101e+06 | 30656.049820 | 78.139042 | 1743 |
| 2 | 1 | 625.951142 | 266023.4824 | 424114.6308 | 2620.192622 | 60.498249 | 2 | 8.463610e+07 | 2 | 5.111813e+06 | 1749.613759 | 8.461296 | 363 |
| 3 | 1 | 779.998728 | 487039.5296 | 308337.7316 | 1925.250307 | 86.897861 | 3 | 3.967690e+08 | 2 | 4.805902e+06 | 43233.237820 | 8.402441 | 2821 |
| 4 | 3 | 242.106837 | 482815.0856 | 360437.7705 | 3948.945383 | 70.949351 | 2 | 3.574413e+07 | 1 | 1.668601e+07 | 14858.662760 | 28.822867 | 2583 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14995 | 3 | 745.032555 | 280007.5738 | 230544.8268 | 4351.687893 | 90.791405 | 4 | 3.484136e+08 | 2 | 1.558508e+07 | 25234.911810 | 78.923200 | 1452 |
| 14996 | 1 | 15.187023 | 377340.5803 | 358547.3589 | 6792.194696 | 78.252040 | 4 | 2.560179e+08 | 3 | 6.866618e+06 | 15762.519790 | 54.982974 | 2598 |
| 14997 | 3 | 877.539059 | 480497.3920 | 214441.6719 | 4588.725297 | 58.282928 | 1 | 1.300112e+08 | 2 | 3.837764e+06 | 44597.809410 | 43.915897 | 2713 |
| 14998 | 7 | 551.264716 | 436383.1694 | 137043.8713 | 7251.144215 | 73.573666 | 2 | 3.334831e+08 | 2 | 5.347706e+06 | 34363.858000 | 4.877145 | 2128 |
| 14999 | 3 | 863.421803 | 314014.3005 | 124461.9178 | 7366.166362 | 86.868893 | 4 | 2.989825e+08 | 2 | 1.832534e+07 | 27193.217600 | 50.559075 | 1871 |
15000 rows × 13 columns
df.size
195000
df.shape
(15000, 13)
df.axes
[RangeIndex(start=0, stop=15000, step=1),
Index(['Type_of_Renewable_Energy', 'Installed_Capacity_MW',
'Energy_Production_MWh', 'Energy_Consumption_MWh',
'Energy_Storage_Capacity_MWh', 'Storage_Efficiency_Percentage',
'Grid_Integration_Level', 'Initial_Investment_USD', 'Funding_Sources',
'Financial_Incentives_USD', 'GHG_Emission_Reduction_tCO2e',
'Air_Pollution_Reduction_Index', 'Jobs_Created'],
dtype='object')]
df.columns
Index(['Type_of_Renewable_Energy', 'Installed_Capacity_MW',
'Energy_Production_MWh', 'Energy_Consumption_MWh',
'Energy_Storage_Capacity_MWh', 'Storage_Efficiency_Percentage',
'Grid_Integration_Level', 'Initial_Investment_USD', 'Funding_Sources',
'Financial_Incentives_USD', 'GHG_Emission_Reduction_tCO2e',
'Air_Pollution_Reduction_Index', 'Jobs_Created'],
dtype='object')
df.describe()
| Type_of_Renewable_Energy | Installed_Capacity_MW | Energy_Production_MWh | Energy_Consumption_MWh | Energy_Storage_Capacity_MWh | Storage_Efficiency_Percentage | Grid_Integration_Level | Initial_Investment_USD | Funding_Sources | Financial_Incentives_USD | GHG_Emission_Reduction_tCO2e | Air_Pollution_Reduction_Index | Jobs_Created | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 15000.000000 | 15000.000000 | 15000.000000 | 15000.000000 | 15000.000000 | 15000.000000 | 15000.000000 | 1.500000e+04 | 15000.000000 | 1.500000e+04 | 15000.000000 | 15000.000000 | 15000.000000 |
| mean | 3.973933 | 495.855747 | 252350.955621 | 225981.854966 | 5030.196472 | 75.219334 | 2.501267 | 2.514849e+08 | 2.004000 | 1.002977e+07 | 25234.722158 | 50.724179 | 2502.668600 |
| std | 1.999380 | 288.212872 | 144062.915425 | 129223.677997 | 2894.096326 | 14.485289 | 1.123306 | 1.432843e+08 | 0.817493 | 5.787303e+06 | 14378.915277 | 28.556578 | 1451.212661 |
| min | 1.000000 | 1.091767 | 1030.103692 | 584.048006 | 2.200208 | 50.003494 | 1.000000 | 1.008107e+06 | 1.000000 | 5.155842e+04 | 100.974460 | 1.009712 | 10.000000 |
| 25% | 2.000000 | 245.475737 | 128568.875900 | 115587.428225 | 2543.341380 | 62.694076 | 1.000000 | 1.259709e+08 | 1.000000 | 4.963454e+06 | 12754.476927 | 26.224426 | 1228.000000 |
| 50% | 4.000000 | 492.907555 | 253216.871250 | 225226.374350 | 5054.036248 | 75.279702 | 3.000000 | 2.539910e+08 | 2.000000 | 1.002414e+07 | 25424.477000 | 50.250207 | 2496.000000 |
| 75% | 6.000000 | 742.254682 | 377083.805575 | 338656.214700 | 7536.935405 | 87.754318 | 4.000000 | 3.759130e+08 | 3.000000 | 1.507417e+07 | 37750.426285 | 75.720133 | 3765.000000 |
| max | 7.000000 | 999.982979 | 499991.200400 | 449922.667800 | 9999.145037 | 99.994955 | 4.000000 | 4.999407e+08 | 3.000000 | 1.999855e+07 | 49997.578530 | 99.980494 | 4999.000000 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 15000 entries, 0 to 14999 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Type_of_Renewable_Energy 15000 non-null int64 1 Installed_Capacity_MW 15000 non-null float64 2 Energy_Production_MWh 15000 non-null float64 3 Energy_Consumption_MWh 15000 non-null float64 4 Energy_Storage_Capacity_MWh 15000 non-null float64 5 Storage_Efficiency_Percentage 15000 non-null float64 6 Grid_Integration_Level 15000 non-null int64 7 Initial_Investment_USD 15000 non-null float64 8 Funding_Sources 15000 non-null int64 9 Financial_Incentives_USD 15000 non-null float64 10 GHG_Emission_Reduction_tCO2e 15000 non-null float64 11 Air_Pollution_Reduction_Index 15000 non-null float64 12 Jobs_Created 15000 non-null int64 dtypes: float64(9), int64(4) memory usage: 1.5 MB
df.isna().sum()
Type_of_Renewable_Energy 0 Installed_Capacity_MW 0 Energy_Production_MWh 0 Energy_Consumption_MWh 0 Energy_Storage_Capacity_MWh 0 Storage_Efficiency_Percentage 0 Grid_Integration_Level 0 Initial_Investment_USD 0 Funding_Sources 0 Financial_Incentives_USD 0 GHG_Emission_Reduction_tCO2e 0 Air_Pollution_Reduction_Index 0 Jobs_Created 0 dtype: int64
df.dtypes
Type_of_Renewable_Energy int64 Installed_Capacity_MW float64 Energy_Production_MWh float64 Energy_Consumption_MWh float64 Energy_Storage_Capacity_MWh float64 Storage_Efficiency_Percentage float64 Grid_Integration_Level int64 Initial_Investment_USD float64 Funding_Sources int64 Financial_Incentives_USD float64 GHG_Emission_Reduction_tCO2e float64 Air_Pollution_Reduction_Index float64 Jobs_Created int64 dtype: object
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(),annot=True,cmap='rainbow')
<Axes: >
df.var()
Type_of_Renewable_Energy 3.997520e+00 Installed_Capacity_MW 8.306666e+04 Energy_Production_MWh 2.075412e+10 Energy_Consumption_MWh 1.669876e+10 Energy_Storage_Capacity_MWh 8.375794e+06 Storage_Efficiency_Percentage 2.098236e+02 Grid_Integration_Level 1.261816e+00 Initial_Investment_USD 2.053038e+16 Funding_Sources 6.682952e-01 Financial_Incentives_USD 3.349287e+13 GHG_Emission_Reduction_tCO2e 2.067532e+08 Air_Pollution_Reduction_Index 8.154781e+02 Jobs_Created 2.106018e+06 dtype: float64
df.cov()
| Type_of_Renewable_Energy | Installed_Capacity_MW | Energy_Production_MWh | Energy_Consumption_MWh | Energy_Storage_Capacity_MWh | Storage_Efficiency_Percentage | Grid_Integration_Level | Initial_Investment_USD | Funding_Sources | Financial_Incentives_USD | GHG_Emission_Reduction_tCO2e | Air_Pollution_Reduction_Index | Jobs_Created | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Type_of_Renewable_Energy | 3.997520e+00 | -3.625395e-01 | 1.215235e+03 | -3.220774e+02 | 1.917457e+01 | -1.123573e-01 | -0.014068 | -4.578642e+06 | -0.010630 | -2.063998e+04 | 4.161362e+02 | -4.239348e-01 | 4.744598e-01 |
| Installed_Capacity_MW | -3.625395e-01 | 8.306666e+04 | -4.782348e+05 | 2.477787e+05 | 9.589998e+03 | 1.914227e+00 | -3.490605 | 2.676324e+08 | -0.499094 | 5.681402e+06 | 5.716652e+04 | -6.180542e+01 | -5.577267e+02 |
| Energy_Production_MWh | 1.215235e+03 | -4.782348e+05 | 2.075412e+10 | -6.972231e+06 | -1.747641e+06 | 5.013188e+02 | -417.277793 | -1.254754e+11 | -1267.525076 | 1.062634e+09 | -8.600616e+06 | -3.206321e+04 | -4.846938e+05 |
| Energy_Consumption_MWh | -3.220774e+02 | 2.477787e+05 | -6.972231e+06 | 1.669876e+10 | -4.565008e+05 | 1.133853e+04 | -397.011971 | -3.983686e+11 | -311.265800 | 7.976445e+09 | 1.898074e+07 | 1.103052e+04 | 1.865765e+06 |
| Energy_Storage_Capacity_MWh | 1.917457e+01 | 9.589998e+03 | -1.747641e+06 | -4.565008e+05 | 8.375794e+06 | 4.318747e+02 | -24.887617 | 1.163868e+09 | 18.430238 | 2.370717e+08 | -7.279311e+04 | -5.027205e+02 | 4.037827e+03 |
| Storage_Efficiency_Percentage | -1.123573e-01 | 1.914227e+00 | 5.013188e+02 | 1.133853e+04 | 4.318747e+02 | 2.098236e+02 | 0.311152 | 7.897642e+06 | -0.049555 | 2.316351e+04 | 1.381326e+03 | 3.198205e-01 | 2.009514e+02 |
| Grid_Integration_Level | -1.406792e-02 | -3.490605e+00 | -4.172778e+02 | -3.970120e+02 | -2.488762e+01 | 3.111520e-01 | 1.261816 | -2.401866e+05 | 0.007995 | -1.469872e+04 | 2.196670e+00 | 1.425433e-01 | 1.336476e+00 |
| Initial_Investment_USD | -4.578642e+06 | 2.676324e+08 | -1.254754e+11 | -3.983686e+11 | 1.163868e+09 | 7.897642e+06 | -240186.628659 | 2.053038e+16 | -473835.832677 | 1.840326e+12 | -2.267804e+10 | -7.335563e+06 | -4.683964e+08 |
| Funding_Sources | -1.062978e-02 | -4.990944e-01 | -1.267525e+03 | -3.112658e+02 | 1.843024e+01 | -4.955450e-02 | 0.007995 | -4.738358e+05 | 0.668295 | -1.125168e+04 | 1.344563e+01 | -5.655922e-02 | 1.443289e+01 |
| Financial_Incentives_USD | -2.063998e+04 | 5.681402e+06 | 1.062634e+09 | 7.976445e+09 | 2.370717e+08 | 2.316351e+04 | -14698.721965 | 1.840326e+12 | -11251.676358 | 3.349287e+13 | 3.112938e+08 | -6.781600e+05 | -1.404093e+08 |
| GHG_Emission_Reduction_tCO2e | 4.161362e+02 | 5.716652e+04 | -8.600616e+06 | 1.898074e+07 | -7.279311e+04 | 1.381326e+03 | 2.196670 | -2.267804e+10 | 13.445634 | 3.112938e+08 | 2.067532e+08 | -4.218332e+03 | 2.538464e+04 |
| Air_Pollution_Reduction_Index | -4.239348e-01 | -6.180542e+01 | -3.206321e+04 | 1.103052e+04 | -5.027205e+02 | 3.198205e-01 | 0.142543 | -7.335563e+06 | -0.056559 | -6.781600e+05 | -4.218332e+03 | 8.154781e+02 | 1.882566e+02 |
| Jobs_Created | 4.744598e-01 | -5.577267e+02 | -4.846938e+05 | 1.865765e+06 | 4.037827e+03 | 2.009514e+02 | 1.336476 | -4.683964e+08 | 14.432888 | -1.404093e+08 | 2.538464e+04 | 1.882566e+02 | 2.106018e+06 |
df.std()
Type_of_Renewable_Energy 1.999380e+00 Installed_Capacity_MW 2.882129e+02 Energy_Production_MWh 1.440629e+05 Energy_Consumption_MWh 1.292237e+05 Energy_Storage_Capacity_MWh 2.894096e+03 Storage_Efficiency_Percentage 1.448529e+01 Grid_Integration_Level 1.123306e+00 Initial_Investment_USD 1.432843e+08 Funding_Sources 8.174933e-01 Financial_Incentives_USD 5.787303e+06 GHG_Emission_Reduction_tCO2e 1.437892e+04 Air_Pollution_Reduction_Index 2.855658e+01 Jobs_Created 1.451213e+03 dtype: float64
sns.pairplot(df)
plt.title('Jobs_Created')
plt.show()
plt.scatter(df['Type_of_Renewable_Energy'], df['Jobs_Created'])
plt.show()
sns.boxplot(df["Type_of_Renewable_Energy"])
<Axes: >
sns.boxplot(df["Installed_Capacity_MW"])
<Axes: >
sns.boxplot(df["Installed_Capacity_MW"])
<Axes: >
sns.boxplot(df["Energy_Production_MWh"])
<Axes: >
sns.boxplot(df["Energy_Consumption_MWh"])
<Axes: >
sns.boxplot(df["Energy_Storage_Capacity_MWh"])
<Axes: >
sns.boxplot(df["Storage_Efficiency_Percentage"])
<Axes: >
sns.boxplot(df["Grid_Integration_Level"])
<Axes: >
sns.boxplot(df["Initial_Investment_USD"])
<Axes: >
sns.boxplot(df["Funding_Sources"])
<Axes: >
sns.boxplot(df["Financial_Incentives_USD"])
<Axes: >
df.shape
(15000, 13)
df1 = df.iloc[:, :12]
df1
| Type_of_Renewable_Energy | Installed_Capacity_MW | Energy_Production_MWh | Energy_Consumption_MWh | Energy_Storage_Capacity_MWh | Storage_Efficiency_Percentage | Grid_Integration_Level | Initial_Investment_USD | Funding_Sources | Financial_Incentives_USD | GHG_Emission_Reduction_tCO2e | Air_Pollution_Reduction_Index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4 | 93.423205 | 103853.2206 | 248708.4892 | 2953.248771 | 89.887562 | 4 | 4.732248e+08 | 1 | 9.207772e+06 | 6663.816572 | 81.742461 |
| 1 | 4 | 590.468942 | 190223.0649 | 166104.1642 | 5305.174042 | 84.403343 | 4 | 1.670697e+08 | 2 | 1.685101e+06 | 30656.049820 | 78.139042 |
| 2 | 1 | 625.951142 | 266023.4824 | 424114.6308 | 2620.192622 | 60.498249 | 2 | 8.463610e+07 | 2 | 5.111813e+06 | 1749.613759 | 8.461296 |
| 3 | 1 | 779.998728 | 487039.5296 | 308337.7316 | 1925.250307 | 86.897861 | 3 | 3.967690e+08 | 2 | 4.805902e+06 | 43233.237820 | 8.402441 |
| 4 | 3 | 242.106837 | 482815.0856 | 360437.7705 | 3948.945383 | 70.949351 | 2 | 3.574413e+07 | 1 | 1.668601e+07 | 14858.662760 | 28.822867 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14995 | 3 | 745.032555 | 280007.5738 | 230544.8268 | 4351.687893 | 90.791405 | 4 | 3.484136e+08 | 2 | 1.558508e+07 | 25234.911810 | 78.923200 |
| 14996 | 1 | 15.187023 | 377340.5803 | 358547.3589 | 6792.194696 | 78.252040 | 4 | 2.560179e+08 | 3 | 6.866618e+06 | 15762.519790 | 54.982974 |
| 14997 | 3 | 877.539059 | 480497.3920 | 214441.6719 | 4588.725297 | 58.282928 | 1 | 1.300112e+08 | 2 | 3.837764e+06 | 44597.809410 | 43.915897 |
| 14998 | 7 | 551.264716 | 436383.1694 | 137043.8713 | 7251.144215 | 73.573666 | 2 | 3.334831e+08 | 2 | 5.347706e+06 | 34363.858000 | 4.877145 |
| 14999 | 3 | 863.421803 | 314014.3005 | 124461.9178 | 7366.166362 | 86.868893 | 4 | 2.989825e+08 | 2 | 1.832534e+07 | 27193.217600 | 50.559075 |
15000 rows × 12 columns
vif_df = pd.DataFrame()
vif_df["independant Features"] = df1.columns
vif_df
| independant Features | |
|---|---|
| 0 | Type_of_Renewable_Energy |
| 1 | Installed_Capacity_MW |
| 2 | Energy_Production_MWh |
| 3 | Energy_Consumption_MWh |
| 4 | Energy_Storage_Capacity_MWh |
| 5 | Storage_Efficiency_Percentage |
| 6 | Grid_Integration_Level |
| 7 | Initial_Investment_USD |
| 8 | Funding_Sources |
| 9 | Financial_Incentives_USD |
| 10 | GHG_Emission_Reduction_tCO2e |
| 11 | Air_Pollution_Reduction_Index |
vif_list = []
for i in range(df1.shape[1]):
vif = variance_inflation_factor(df1.to_numpy(),i)
vif_list.append(vif)
vif_df["VIF"] = vif_list
vif_df
| independant Features | VIF | |
|---|---|---|
| 0 | Type_of_Renewable_Energy | 4.643886 |
| 1 | Installed_Capacity_MW | 3.789933 |
| 2 | Energy_Production_MWh | 3.866499 |
| 3 | Energy_Consumption_MWh | 3.881722 |
| 4 | Energy_Storage_Capacity_MWh | 3.860215 |
| 5 | Storage_Efficiency_Percentage | 17.040044 |
| 6 | Grid_Integration_Level | 5.548451 |
| 7 | Initial_Investment_USD | 3.880253 |
| 8 | Funding_Sources | 6.360197 |
| 9 | Financial_Incentives_USD | 3.833911 |
| 10 | GHG_Emission_Reduction_tCO2e | 3.909498 |
| 11 | Air_Pollution_Reduction_Index | 3.946135 |
df.shape
(15000, 13)
x = df1
y = df["Jobs_Created"]
xtrain,xtest,ytrain,ytest = train_test_split(x, y, test_size=0.3, random_state=11)
xtrain.shape
(10500, 12)
ytrain.shape
(10500,)
xtest.shape
(4500, 12)
ytest.shape
(4500,)
log_reg = LogisticRegression()
log_reg
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
log_reg_model = log_reg.fit(xtrain, ytrain)
log_reg_model
C:\Users\prajw\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
ytrain.head(10)
14314 4050 2415 2670 8169 1737 12199 1631 2737 3030 12032 3382 12452 148 861 4120 4661 2168 8245 4360 Name: Jobs_Created, dtype: int64
ytrain_pred = log_reg_model.predict(xtrain)
ytrain_pred
array([1045, 4888, 1045, ..., 1045, 4054, 3470], dtype=int64)
ytrain_pred = log_reg_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 3796829.2994285715 mean absolute error : 1588.490476190476 Root mean squared error : 1948.5454317076037 R2 score: -0.7948788777442457
ytest_pred = log_reg_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 3812043.5486666667 mean absolute error : 1594.8197777777777 Root mean squared error : 1952.4455302688132 R2 score: -0.830701998967226
dt_reg = DecisionTreeRegressor()
dt_reg
DecisionTreeRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeRegressor()
dt_reg_model = dt_reg.fit(xtrain,ytrain)
dt_reg_model
DecisionTreeRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeRegressor()
ytrain_pred = dt_reg_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 0.0 mean absolute error : 0.0 Root mean squared error : 0.0 R2 score: 1.0
ytest_pred = dt_reg_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 4294379.726666667 mean absolute error : 1696.8528888888889 Root mean squared error : 2072.288523991451 R2 score: -1.0623399102255227
hyperparameters = {
"criterion" : ["squared_error", "absolute_error", "friedman_mse", "poisson"],
"max_depth" : np.arange(2,50),
"min_samples_split": np.arange(2,10),
"min_samples_leaf" : np.arange(2,15)
}
print(hyperparameters)
{'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson'], 'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]), 'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9]), 'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])}
rscv = RandomizedSearchCV(dt_reg_model, hyperparameters, cv=5) #cv : cross validation
rscv
RandomizedSearchCV(cv=5, estimator=DecisionTreeRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9])})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomizedSearchCV(cv=5, estimator=DecisionTreeRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9])})DecisionTreeRegressor()
DecisionTreeRegressor()
rscv_reg = rscv.fit(xtrain, ytrain)
rscv_reg
RandomizedSearchCV(cv=5, estimator=DecisionTreeRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9])})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomizedSearchCV(cv=5, estimator=DecisionTreeRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9])})DecisionTreeRegressor()
DecisionTreeRegressor()
rscv_reg.best_estimator_
DecisionTreeRegressor(max_depth=3, min_samples_leaf=7, min_samples_split=7)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeRegressor(max_depth=3, min_samples_leaf=7, min_samples_split=7)
rscv_reg_hyp = DecisionTreeRegressor(criterion='poisson', max_depth=10, min_samples_leaf=3,
min_samples_split=7)
rscv_reg_hyp
DecisionTreeRegressor(criterion='poisson', max_depth=10, min_samples_leaf=3,
min_samples_split=7)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. DecisionTreeRegressor(criterion='poisson', max_depth=10, min_samples_leaf=3,
min_samples_split=7)dt_reg_hyp_model = rscv_reg_hyp.fit(xtrain,ytrain)
dt_reg_hyp_model
DecisionTreeRegressor(criterion='poisson', max_depth=10, min_samples_leaf=3,
min_samples_split=7)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. DecisionTreeRegressor(criterion='poisson', max_depth=10, min_samples_leaf=3,
min_samples_split=7)ytrain_pred = dt_reg_hyp_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 1922712.719504361 mean absolute error : 1177.1810715617278 Root mean squared error : 1386.619168879603 R2 score: 0.09107411051427561
ytest_pred = dt_reg_hyp_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 2294414.3971356065 mean absolute error : 1297.390343694187 Root mean squared error : 1514.7324506775467 R2 score: -0.10187330487462609
plot_tree(dt_reg_hyp_model, feature_names=df.columns,class_names= list("ABCDEF"), filled=True)
plt.savefig("dt_hyp_model.png")
rf_reg = RandomForestRegressor()
rf_reg
RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor()
rf_reg_model = rf_reg.fit(xtrain,ytrain)
rf_reg_model
RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor()
ytrain_pred = rf_reg_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 300817.0957437714 mean absolute error : 468.8258590476191 Root mean squared error : 548.4679532513923 R2 score: 0.8577944361901852
ytest_pred = rf_reg_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 2141911.119747 mean absolute error : 1265.6518688888889 Root mean squared error : 1463.526945343679 R2 score: -0.028634882700244946
hyperparameters = {
"n_estimators" : np.arange(2,50),
"criterion" : ["squared_error", "absolute_error", "friedman_mse", "poisson"],
"max_depth" : np.arange(2,50),
"min_samples_split": np.arange(2,10),
"min_samples_leaf" : np.arange(2,15)
}
print(hyperparameters)
{'n_estimators': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]), 'criterion': ['squared_error', 'absolute_error', 'friedman_mse', 'poisson'], 'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]), 'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9]), 'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])}
rscv_rf = RandomizedSearchCV(rf_reg_model, hyperparameters, cv=5)
rscv_rf
RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9]),
'n_estimators': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9]),
'n_estimators': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})RandomForestRegressor()
RandomForestRegressor()
rscv_rf_reg = rscv_reg.fit(xtrain,ytrain)
rscv_rf_reg
RandomizedSearchCV(cv=5, estimator=DecisionTreeRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9])})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. RandomizedSearchCV(cv=5, estimator=DecisionTreeRegressor(),
param_distributions={'criterion': ['squared_error',
'absolute_error',
'friedman_mse',
'poisson'],
'max_depth': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'min_samples_leaf': array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
'min_samples_split': array([2, 3, 4, 5, 6, 7, 8, 9])})DecisionTreeRegressor()
DecisionTreeRegressor()
rscv_rf_reg.best_estimator_
DecisionTreeRegressor(criterion='friedman_mse', max_depth=7, min_samples_leaf=3,
min_samples_split=8)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. DecisionTreeRegressor(criterion='friedman_mse', max_depth=7, min_samples_leaf=3,
min_samples_split=8)rscv_rf_reg = DecisionTreeRegressor(criterion='poisson', max_depth=3, min_samples_leaf=8,
min_samples_split=8)
rscv_rf_reg
DecisionTreeRegressor(criterion='poisson', max_depth=3, min_samples_leaf=8,
min_samples_split=8)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. DecisionTreeRegressor(criterion='poisson', max_depth=3, min_samples_leaf=8,
min_samples_split=8)rscv_rf_reg_model = rscv_rf_reg.fit(xtrain,ytrain)
ytrain_pred = rscv_rf_reg_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 2103156.847674949 mean absolute error : 1255.1765315363575 Root mean squared error : 1450.2264815107153 R2 score: 0.005772578966594999
ytest_pred = rscv_rf_reg_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 2090347.9410888588 mean absolute error : 1251.2156414210908 Root mean squared error : 1445.8035624139466 R2 score: -0.0038720978481199264
adb_reg = AdaBoostRegressor()
adb_reg_model = adb_reg.fit(xtrain,ytrain)
adb_reg_model
AdaBoostRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
AdaBoostRegressor()
ytrain_pred = adb_reg_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 2111358.455553671 mean absolute error : 1259.2899533595385 Root mean squared error : 1453.051429080771 R2 score: 0.0018954247454995299
ytest_pred = adb_reg_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 2083499.1601479966 mean absolute error : 1250.8864015602019 Root mean squared error : 1443.4331159246683 R2 score: -0.0005830281407945836
hyp = {"n_estimators" : np.arange(2,100),
"learning_rate" : [0, 0.1, 0.001, 0.0001, 1]}
RandomizedSearchCV(adb_reg_model, hyp, cv=5).fit(xtrain,ytrain).best_estimator_
C:\Users\prajw\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:378: FitFailedWarning:
10 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.
Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
File "C:\Users\prajw\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "C:\Users\prajw\anaconda3\lib\site-packages\sklearn\ensemble\_weight_boosting.py", line 124, in fit
self._validate_params()
File "C:\Users\prajw\anaconda3\lib\site-packages\sklearn\base.py", line 581, in _validate_params
validate_parameter_constraints(
File "C:\Users\prajw\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 97, in validate_parameter_constraints
raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'learning_rate' parameter of AdaBoostRegressor must be a float in the range (0, inf). Got 0 instead.
warnings.warn(some_fits_failed_message, FitFailedWarning)
C:\Users\prajw\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:952: UserWarning: One or more of the test scores are non-finite: [-0.0017325 -0.00123039 -0.0015858 -0.00018256 nan -0.000965
nan -0.00167245 -0.00123635 -0.0010247 ]
warnings.warn(
AdaBoostRegressor(learning_rate=0.1, n_estimators=8)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
AdaBoostRegressor(learning_rate=0.1, n_estimators=8)
adb_reg_hyp = AdaBoostRegressor(learning_rate=0.1, n_estimators=76)
adb_reg_hyp_model = adb_reg_hyp.fit(xtrain, ytrain)
adb_reg_hyp_model
AdaBoostRegressor(learning_rate=0.1, n_estimators=76)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
AdaBoostRegressor(learning_rate=0.1, n_estimators=76)
ytrain_pred = adb_reg_hyp_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 2109379.248700918 mean absolute error : 1258.6591436938827 Root mean squared error : 1452.3702175068581 R2 score: 0.002831057162586914
ytest_pred = adb_reg_hyp_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 2084796.4621296413 mean absolute error : 1251.2237551578348 Root mean squared error : 1443.882426698809 R2 score: -0.001206046556177398
gdb_reg_model = GradientBoostingRegressor().fit(xtrain, ytrain)
gdb_reg_model
GradientBoostingRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GradientBoostingRegressor()
ytrain_pred = gdb_reg_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 2001201.1718399662 mean absolute error : 1223.691900725033 Root mean squared error : 1414.6381770049775 R2 score: 0.05397018665201092
ytest_pred = gdb_reg_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 2095747.1312670347 mean absolute error : 1252.2144979335435 Root mean squared error : 1447.6695518201088 R2 score: -0.006465013727963154
hyp = {"n_estimators" : np.arange(2,100),
"learning_rate" : [0, 0.1, 0.001, 0.0001, 1]}
RandomizedSearchCV(gdb_reg_model, hyp, cv=5).fit(xtrain, ytrain).best_estimator_
GradientBoostingRegressor(learning_rate=0.001, n_estimators=37)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GradientBoostingRegressor(learning_rate=0.001, n_estimators=37)
gdb_reg_hyp_model = GradientBoostingRegressor(learning_rate=0.001, n_estimators=42).fit(xtrain, ytrain)
gdb_reg_hyp_model
GradientBoostingRegressor(learning_rate=0.001, n_estimators=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GradientBoostingRegressor(learning_rate=0.001, n_estimators=42)
ytrain_pred = gdb_reg_hyp_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
mean squared error : 2114316.2139810575 mean absolute error : 1260.159652620357 Root mean squared error : 1454.0688477445135 R2 score: 0.0004972006726960965
ytest_pred = gdb_reg_hyp_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
mean squared error : 2084210.8770516566 mean absolute error : 1251.0061098955455 Root mean squared error : 1443.6796310302561 R2 score: -0.0009248242251242988
svc_model = SVC().fit(xtrain,ytrain)
svc_model
SVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC()
ytrain_pred = svc_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
ytest_pred = svc_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_pred)
print(f"R2 score: {r2score}")
hyp = {"kernel" : ["linear", "rbf"], "C" : np.arange(2,15)}
hyp
svc_hyp = SVC(kernel = "linear", C =7)
svc_hyp_model = svc_hyp.fit(xtrain,ytrain)
svc_hyp_model
ytrain_pred = svc_hyp_model.predict(xtrain)
mse = mean_squared_error(ytrain,ytrain_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytrain,ytrain_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytrain,ytrain_pred)
print(f"R2 score: {r2score}")
## model Evaluation for Testing
ytest_pred = svc_hyp_model.predict(xtest)
mse = mean_squared_error(ytest,ytest_pred)
print(f"mean squared error : {mse}")
mae = mean_absolute_error(ytest,ytest_pred)
print(f"mean absolute error : {mae}")
rmse = np.sqrt(mse)
print(f"Root mean squared error : {rmse}")
r2score =r2_score(ytest,ytest_p